install.packages("tidyverse") ## instalacja pakietu
library(tidyverse)
library(readxl)
rent <- read_excel(path = "../data-raw/rent-poznan.xlsx")
install.packages("tidyverse")
trying URL 'https://cran.rstudio.com/bin/macosx/big-sur-x86_64/contrib/4.3/tidyverse_2.0.0.tgz'
Content type 'application/x-gzip' length 428408 bytes (418 KB)
==================================================
downloaded 418 KB

The downloaded binary packages are in
    /var/folders/zs/2z4rlxv54pv7ml4jld2kccmc0000gn/T//RtmpiZknvO/downloaded_packages
rent <- as.data.frame(rent)
head(rent)

Wybieramy kolumny z funkcją select z pakietu dplyr.

rent |> select(id, price, flat_area)
rent |> select(1, 5, 10)
rent |> select(1, 5, price)
rent |> select(cena=price, pow=flat_area)
rent |> select(starts_with("p"), ends_with("id"), flat_for_students)
rent |> select(price:flat_for_students, -flat_rooms, -flat_build_year)
rent |> select(contains("ri")) 
rent |> select_if(is.character)
  1. wybrać kolumny, które mają w nazwie “ad”
rent |> select(contains("ad"))
  1. wybrać kolumny od flat_heating do flat_balcony bez kolumny ad_promo
rent |> select(flat_heating:flat_balcony, -ad_promo)

Wybieramy wiersze z funkcją filter

rent |> filter(price < 500)
rent |> filter(price < 500 & flat_rooms == 1)
rent |> filter(price < 500, flat_rooms == 1)
rent |> filter(price > 5000 | price < 200)

rent |> filter(between(price, 1000, 1200)) ## to i to poniżej jest tym samym
rent |> filter(price >= 1000, price <= 1200)

rent |> filter(price < mean(price))

rent |> filter(price < mean(price), .by = individual)

Zadania: wybranie wierszy po dzielcy: Rataje lub Winogrady

rent |> filter(quarter %in% c("Rataje", "Winogrady"))

Wstawianie nowych kolumn z funkcją mutate (transmute)

rent |> mutate(pricem2 = price/flat_area,
               pricem2_log = log(pricem2))

rent |> mutate(pricem2_gr = mean(price), .by = quarter)

rent |> mutate_at(vars(price, flat_area), log)
rent |> mutate_if(is.logical, as.numeric)

Podsumowujemy zbiór danych z funkcją summarize/se

rent |> summarise(m=mean(price), med=median(price), n=n())

Wyznaczam odsetek ogłoszeń z określonymi charakterystykami (np. odsetek ogł. z alarmami).

rent |> summarise_if(is.logical, mean)
rent |> summarise_if(is.numeric, median)
rent |> summarise_if(is.numeric, median, na.rm = TRUE)

rent |> filter(is.na(flat_floor_no)) ## is.na (czy jest brak danych)
NA

Podsumujemy wybrane kolumny z funkcją summarise_at

rent |> 
  summarise_at(vars(price, flat_area, flat_rent, flat_deposit),
               mean)

rent |> 
  summarise_at(
    vars(price, flat_area, flat_rent, flat_deposit),
    list(sr=mean, med=median)
               )

rent |> 
  summarise_at(vars(flat_balcony, flat_basement),
               list(srednia=mean))

Na wszystkich kolumnach wyznaczam określone statystyki opisowe

rent |> 
  select(price, flat_area, flat_rent, flat_deposit) |> 
  summarise_all(mean)

rent |> 
  select(price, flat_area, flat_rent, flat_deposit) |> 
  summarise_all(list(s=mean, m=median))

rent |> 
  select(price, flat_area, flat_rent, flat_deposit) |> 
  summarise_all(list(s=mean, m=median, n=length))

Ćwiczenie:

rent |> 
  filter(between(price, 700,15000), 
         between(flat_area, 15, 150)) |> 
  mutate(pricem2 = price/flat_area) |> 
  summarise_at(vars(price, flat_area, pricem2), list(s=mean, m=median))

` Wyznaczamy średnią i medianę ceny według liczby pokoi

rent |> 
  group_by(flat_rooms) |> 
  summarise_at(vars(price), list(s=mean, m=median, n = length))

rent |> 
  group_by(pokoje=flat_rooms, flat_for_students) |> 
  summarise_at(vars(price), list(s=mean, m=median, n = length)) |> 
  ungroup() |> 
  mutate(n_total = sum(n),
         p = n/sum(n)*100)
NA

Wyznaczcie liczbę ogłoszeń według następujących zmiennych:

rent |> 
  group_by(flat_for_students, individual, flat_balcony) |> 
  summarise(n = n()) |> 
  ungroup() |> 
  mutate(total = sum(n))
`summarise()` has grouped output by 'flat_for_students', 'individual'. You can override using the `.groups` argument.
rent |> 
  count(flat_for_students, individual, flat_balcony)

Dodajemy do zbioru danych średnią artymetyczną według liczby pokoi


rent |> 
  group_by(flat_rooms) |> 
  mutate(price_rooms = mean(price)) |> 
  
  group_by(flat_rooms, flat_for_students) |>
  mutate(price_rooms2 = mean(price)) |> 
  
  select(price, flat_rooms, price_rooms2)
Adding missing grouping variables: `flat_for_students`

Zadanie:

rent |> 
  filter(between(price, 700,15000), 
         between(flat_area, 15, 150),
         quarter %in% c("Rataje", "Naramowice", "Jeżyce")) |> 
  mutate(pricem2 = price/flat_area) |> 
  group_by(quarter) |> 
  summarise_at(vars(price, flat_area, pricem2), list(s=mean, m=median)) |> 
  arrange(price_s) |> 
  rename(cena = price_s)
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3IgZXZhbCA9IEZBTFNFfQppbnN0YWxsLnBhY2thZ2VzKCJ0aWR5dmVyc2UiKSAjIyBpbnN0YWxhY2phIHBha2lldHUKYGBgCgpgYGB7cn0KbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkocmVhZHhsKQpgYGAKCmBgYHtyfQpyZW50IDwtIHJlYWRfZXhjZWwocGF0aCA9ICIuLi9kYXRhLXJhdy9yZW50LXBvem5hbi54bHN4IikKcmVudCA8LSBhcy5kYXRhLmZyYW1lKHJlbnQpCmhlYWQocmVudCkKYGBgCld5YmllcmFteSBrb2x1bW55IHogZnVua2NqxIUgc2VsZWN0IHogcGFraWV0dSBkcGx5ci4KCmBgYHtyfQpyZW50IHw+IHNlbGVjdChpZCwgcHJpY2UsIGZsYXRfYXJlYSkKcmVudCB8PiBzZWxlY3QoMSwgNSwgMTApCnJlbnQgfD4gc2VsZWN0KDEsIDUsIHByaWNlKQpyZW50IHw+IHNlbGVjdChjZW5hPXByaWNlLCBwb3c9ZmxhdF9hcmVhKQpyZW50IHw+IHNlbGVjdChzdGFydHNfd2l0aCgicCIpLCBlbmRzX3dpdGgoImlkIiksIGZsYXRfZm9yX3N0dWRlbnRzKQpyZW50IHw+IHNlbGVjdChwcmljZTpmbGF0X2Zvcl9zdHVkZW50cywgLWZsYXRfcm9vbXMsIC1mbGF0X2J1aWxkX3llYXIpCnJlbnQgfD4gc2VsZWN0KGNvbnRhaW5zKCJyaSIpKSAKcmVudCB8PiBzZWxlY3RfaWYoaXMuY2hhcmFjdGVyKQpgYGAKCjEuIHd5YnJhxIcga29sdW1ueSwga3TDs3JlIG1hasSFIHcgbmF6d2llICJhZCIgCgpgYGB7cn0KcmVudCB8PiBzZWxlY3QoY29udGFpbnMoImFkIikpCmBgYAoKMi4gd3licmHEhyBrb2x1bW55IG9kIGZsYXRfaGVhdGluZyBkbyBmbGF0X2JhbGNvbnkgYmV6IGtvbHVtbnkgYWRfcHJvbW8KCgpgYGB7cn0KcmVudCB8PiBzZWxlY3QoZmxhdF9oZWF0aW5nOmZsYXRfYmFsY29ueSwgLWFkX3Byb21vKQpgYGAKCld5YmllcmFteSB3aWVyc3plIHogZnVua2NqxIUgZmlsdGVyCgpgYGB7cn0KcmVudCB8PiBmaWx0ZXIocHJpY2UgPCA1MDApCnJlbnQgfD4gZmlsdGVyKHByaWNlIDwgNTAwICYgZmxhdF9yb29tcyA9PSAxKQpyZW50IHw+IGZpbHRlcihwcmljZSA8IDUwMCwgZmxhdF9yb29tcyA9PSAxKQpyZW50IHw+IGZpbHRlcihwcmljZSA+IDUwMDAgfCBwcmljZSA8IDIwMCkKCnJlbnQgfD4gZmlsdGVyKGJldHdlZW4ocHJpY2UsIDEwMDAsIDEyMDApKSAjIyB0byBpIHRvIHBvbmnFvGVqIGplc3QgdHltIHNhbXltCnJlbnQgfD4gZmlsdGVyKHByaWNlID49IDEwMDAsIHByaWNlIDw9IDEyMDApCgpyZW50IHw+IGZpbHRlcihwcmljZSA8IG1lYW4ocHJpY2UpKQoKcmVudCB8PiBmaWx0ZXIocHJpY2UgPCBtZWFuKHByaWNlKSwgLmJ5ID0gaW5kaXZpZHVhbCkKYGBgCgpaYWRhbmlhOiB3eWJyYW5pZSB3aWVyc3p5IHBvIGR6aWVsY3k6IFJhdGFqZSBsdWIgV2lub2dyYWR5CgpgYGB7cn0KcmVudCB8PiBmaWx0ZXIocXVhcnRlciAlaW4lIGMoIlJhdGFqZSIsICJXaW5vZ3JhZHkiKSkKYGBgCgpXc3Rhd2lhbmllIG5vd3ljaCBrb2x1bW4geiBmdW5rY2rEhSBtdXRhdGUgKHRyYW5zbXV0ZSkKCmBgYHtyfQpyZW50IHw+IG11dGF0ZShwcmljZW0yID0gcHJpY2UvZmxhdF9hcmVhLAogICAgICAgICAgICAgICBwcmljZW0yX2xvZyA9IGxvZyhwcmljZW0yKSkKCnJlbnQgfD4gbXV0YXRlKHByaWNlbTJfZ3IgPSBtZWFuKHByaWNlKSwgLmJ5ID0gcXVhcnRlcikKCnJlbnQgfD4gbXV0YXRlX2F0KHZhcnMocHJpY2UsIGZsYXRfYXJlYSksIGxvZykKcmVudCB8PiBtdXRhdGVfaWYoaXMubG9naWNhbCwgYXMubnVtZXJpYykKYGBgClBvZHN1bW93dWplbXkgemJpw7NyIGRhbnljaCB6IGZ1bmtjasSFIHN1bW1hcml6ZS9zZQoKYGBge3J9CnJlbnQgfD4gc3VtbWFyaXNlKG09bWVhbihwcmljZSksIG1lZD1tZWRpYW4ocHJpY2UpLCBuPW4oKSkKYGBgCgpXeXpuYWN6YW0gb2RzZXRlayBvZ8WCb3N6ZcWEIHogb2tyZcWbbG9ueW1pIGNoYXJha3RlcnlzdHlrYW1pIChucC4gb2RzZXRlayBvZ8WCLiB6IGFsYXJtYW1pKS4KCmBgYHtyfQpyZW50IHw+IHN1bW1hcmlzZV9pZihpcy5sb2dpY2FsLCBtZWFuKQpyZW50IHw+IHN1bW1hcmlzZV9pZihpcy5udW1lcmljLCBtZWRpYW4pCnJlbnQgfD4gc3VtbWFyaXNlX2lmKGlzLm51bWVyaWMsIG1lZGlhbiwgbmEucm0gPSBUUlVFKQoKcmVudCB8PiBmaWx0ZXIoaXMubmEoZmxhdF9mbG9vcl9ubykpICMjIGlzLm5hIChjenkgamVzdCBicmFrIGRhbnljaCkKCmBgYAoKUG9kc3VtdWplbXkgd3licmFuZSBrb2x1bW55IHogZnVua2NqxIUgc3VtbWFyaXNlX2F0CgpgYGB7cn0KcmVudCB8PiAKICBzdW1tYXJpc2VfYXQodmFycyhwcmljZSwgZmxhdF9hcmVhLCBmbGF0X3JlbnQsIGZsYXRfZGVwb3NpdCksCiAgICAgICAgICAgICAgIG1lYW4pCgpyZW50IHw+IAogIHN1bW1hcmlzZV9hdCgKICAgIHZhcnMocHJpY2UsIGZsYXRfYXJlYSwgZmxhdF9yZW50LCBmbGF0X2RlcG9zaXQpLAogICAgbGlzdChzcj1tZWFuLCBtZWQ9bWVkaWFuKQogICAgICAgICAgICAgICApCgpyZW50IHw+IAogIHN1bW1hcmlzZV9hdCh2YXJzKGZsYXRfYmFsY29ueSwgZmxhdF9iYXNlbWVudCksCiAgICAgICAgICAgICAgIGxpc3Qoc3JlZG5pYT1tZWFuKSkKYGBgCk5hIHdzenlzdGtpY2gga29sdW1uYWNoIHd5em5hY3phbSBva3JlxZtsb25lIHN0YXR5c3R5a2kgb3Bpc293ZQoKYGBge3J9CnJlbnQgfD4gCiAgc2VsZWN0KHByaWNlLCBmbGF0X2FyZWEsIGZsYXRfcmVudCwgZmxhdF9kZXBvc2l0KSB8PiAKICBzdW1tYXJpc2VfYWxsKG1lYW4pCgpyZW50IHw+IAogIHNlbGVjdChwcmljZSwgZmxhdF9hcmVhLCBmbGF0X3JlbnQsIGZsYXRfZGVwb3NpdCkgfD4gCiAgc3VtbWFyaXNlX2FsbChsaXN0KHM9bWVhbiwgbT1tZWRpYW4pKQoKcmVudCB8PiAKICBzZWxlY3QocHJpY2UsIGZsYXRfYXJlYSwgZmxhdF9yZW50LCBmbGF0X2RlcG9zaXQpIHw+IAogIHN1bW1hcmlzZV9hbGwobGlzdChzPW1lYW4sIG09bWVkaWFuLCBuPWxlbmd0aCkpCmBgYArEhndpY3plbmllOgoKLSBvZ3Jhbmljem55Y2ggemJpw7NyIGRhbnljaCBkbyBjZW4geiBwcnplZHppYcWCdSBbNzAwLCAxNTAwMF0KLSBvZ3JhbmljennEhyB6YmnDs3IgZGFueWNoIGRvIHBvd2llcnN6Y2huaSB6IHByemVkemlhxYJ1IFsxNSwgMTUwXQotIHd5em5hY3p5xIcgY2VuxJkgbTIKLSBwb2RzdW1vd2HEhyBjZW7EmSwgcG93aWVyemNobmnEmSBpIGNlbsSZIG0yIHV6eXdhasSFYyDFm3JlZG5pZWogaSBtZWRpYW55CgpgYGB7cn0KcmVudCB8PiAKICBmaWx0ZXIoYmV0d2VlbihwcmljZSwgNzAwLDE1MDAwKSwgCiAgICAgICAgIGJldHdlZW4oZmxhdF9hcmVhLCAxNSwgMTUwKSkgfD4gCiAgbXV0YXRlKHByaWNlbTIgPSBwcmljZS9mbGF0X2FyZWEpIHw+IAogIHN1bW1hcmlzZV9hdCh2YXJzKHByaWNlLCBmbGF0X2FyZWEsIHByaWNlbTIpLCBsaXN0KHM9bWVhbiwgbT1tZWRpYW4pKQpgYGAKCmAKV3l6bmFjemFteSDFm3JlZG5pxIUgaSBtZWRpYW7EmSBjZW55IHdlZMWCdWcgbGljemJ5IHBva29pCgpgYGB7cn0KcmVudCB8PiAKICBncm91cF9ieShmbGF0X3Jvb21zKSB8PiAKICBzdW1tYXJpc2VfYXQodmFycyhwcmljZSksIGxpc3Qocz1tZWFuLCBtPW1lZGlhbiwgbiA9IGxlbmd0aCkpCgpyZW50IHw+IAogIGdyb3VwX2J5KHBva29qZT1mbGF0X3Jvb21zLCBmbGF0X2Zvcl9zdHVkZW50cykgfD4gCiAgc3VtbWFyaXNlX2F0KHZhcnMocHJpY2UpLCBsaXN0KHM9bWVhbiwgbT1tZWRpYW4sIG4gPSBsZW5ndGgpKSB8PiAKICB1bmdyb3VwKCkgfD4gCiAgbXV0YXRlKG5fdG90YWwgPSBzdW0obiksCiAgICAgICAgIHAgPSBuL3N1bShuKSoxMDApCgpgYGAKCld5em5hY3pjaWUgbGljemLEmSBvZ8WCb3N6ZcWEIHdlZMWCdWcgbmFzdMSZcHVqxIVjeWNoIHptaWVubnljaDoKCisgZmxhdF9mb3Jfc3R1ZGVudHMKKyBpbmRpdmlkdWFsCisgZmxhdF9iYWxjb255CgpgYGB7cn0KcmVudCB8PiAKICBncm91cF9ieShmbGF0X2Zvcl9zdHVkZW50cywgaW5kaXZpZHVhbCwgZmxhdF9iYWxjb255KSB8PiAKICBzdW1tYXJpc2UobiA9IG4oKSkgfD4gCiAgdW5ncm91cCgpIHw+IAogIG11dGF0ZSh0b3RhbCA9IHN1bShuKSkKCnJlbnQgfD4gCiAgY291bnQoZmxhdF9mb3Jfc3R1ZGVudHMsIGluZGl2aWR1YWwsIGZsYXRfYmFsY29ueSkKYGBgCgpEb2RhamVteSBkbyB6YmlvcnUgZGFueWNoICDFm3JlZG5pxIUgYXJ0eW1ldHljem7EhSB3ZWTFgnVnIGxpY3pieSBwb2tvaQoKYGBge3J9CgpyZW50IHw+IAogIGdyb3VwX2J5KGZsYXRfcm9vbXMpIHw+IAogIG11dGF0ZShwcmljZV9yb29tcyA9IG1lYW4ocHJpY2UpKSB8PiAKICAKICBncm91cF9ieShmbGF0X3Jvb21zLCBmbGF0X2Zvcl9zdHVkZW50cykgfD4KICBtdXRhdGUocHJpY2Vfcm9vbXMyID0gbWVhbihwcmljZSkpIHw+IAogIAogIHNlbGVjdChwcmljZSwgZmxhdF9yb29tcywgcHJpY2Vfcm9vbXMyKQpgYGAKClphZGFuaWU6CgotIG9ncmFuaWN6bnljaCB6YmnDs3IgZGFueWNoIGRvIGNlbiB6IHByemVkemlhxYJ1IFs3MDAsIDE1MDAwXQotIG9ncmFuaWN6ecSHIHpiacOzciBkYW55Y2ggZG8gcG93aWVyc3pjaG5pIHogcHJ6ZWR6aWHFgnUgWzE1LCAxNTBdCi0gb2dyYW5pY3p5xIcgZG8gbWllc3prYcWEIHogUmF0YWosIE5hcmFtb3dpYyBsdWIgSmXFvHljCi0gd3l6bmFjennEhyBjZW7EmSBtMgotIHpncnVwb3dhxIcgd2VkxYJ1ZyBxdWFydGVyCi0gcG9kc3Vtb3dhxIcgY2VuxJksIHBvd2llcnpjaG5pxJkgaSBjZW7EmSBtMiB1enl3YWrEhWMgxZtyZWRuaWVqIGkgbWVkaWFueQoKYGBge3J9CnJlbnQgfD4gCiAgZmlsdGVyKGJldHdlZW4ocHJpY2UsIDcwMCwxNTAwMCksIAogICAgICAgICBiZXR3ZWVuKGZsYXRfYXJlYSwgMTUsIDE1MCksCiAgICAgICAgIHF1YXJ0ZXIgJWluJSBjKCJSYXRhamUiLCAiTmFyYW1vd2ljZSIsICJKZcW8eWNlIikpIHw+IAogIG11dGF0ZShwcmljZW0yID0gcHJpY2UvZmxhdF9hcmVhKSB8PiAKICBncm91cF9ieShxdWFydGVyKSB8PiAKICBzdW1tYXJpc2VfYXQodmFycyhwcmljZSwgZmxhdF9hcmVhLCBwcmljZW0yKSwgbGlzdChzPW1lYW4sIG09bWVkaWFuKSkgfD4gCiAgYXJyYW5nZShwcmljZV9zKSB8PiAKICByZW5hbWUoY2VuYSA9IHByaWNlX3MpCmBgYAoKCgoK